Code
library(tidyverse)
library(arrow)
library(splines)
library(corrr)
library(scales)
library(FactoMineR)
library(factoextra)
library(glue)
theme_set(theme_bw() + theme(legend.position = "top"))Exploratory Data Analysis 1
library(tidyverse)
library(arrow)
library(splines)
library(corrr)
library(scales)
library(FactoMineR)
library(factoextra)
library(glue)
theme_set(theme_bw() + theme(legend.position = "top"))country que tiene el país, esto lo hago con la finalidad de evaluar patrones de comportamiento por diferentes países y porque podría ser de ayuda para extrapolar a regiones donde no tenemos registros históricos de floración. Para diferenciar los datos de NPN para USA, los etiqueto en la variable country como USA-NPN y a los de Washington DC como USA-WDC.USA-WDC porque sólo es una coordenada (lat=38.88535, long=-77.03863) y tampoco se muestran los resultados de USA-NPN, la razón es que las variables bioclimáticas no están para las coordenadas de USA-NPN, sin embargo, como información de clima fue suministrada por NPN, uso esta información para análisis exploratorio al final de este documento y también en el próximo (06-EDA2-FE.qmd).df_full <- read_parquet("../external-data/df_full.parquet") |>
mutate(
country =
case_when(
str_detect(location, "Japan") ~ "Japan",
str_detect(location, "kyoto") ~ "Japan",
str_detect(location, "liestal") ~ "Switzerland",
str_detect(location, "Switzerland") ~ "Switzerland",
str_detect(location, "South Korea") ~ "South Korea",
str_detect(location, "vancouver") ~ "Canada",
str_detect(location, "washingtondc") ~ "USA-WDC",
str_detect(location, "NPN") ~ "USA-NPN"
)
) |>
relocate(location, country, everything())df_full |>
filter(location != "NPN") |>
ggplot(aes(x = bloom_doy)) +
facet_wrap(~country, ncol = 1, scales = "free_y") +
geom_histogram(color = "black") +
geom_rug() +
labs(x = "Bloom DOY", y = "Count",
title = "DOY distribution by country",
subtitle = "Original scale")
df_full |>
filter(location != "NPN") |>
ggplot(aes(x = bloom_doy)) +
facet_wrap(~country, ncol = 1, scales = "free_y") +
geom_histogram(color = "black") +
geom_rug() +
scale_x_log10() +
labs(x = "Bloom DOY", y = "Count",
title = "DOY distribution by country",
subtitle = "Logarithmic scale")df_full |>
filter(country == "Japan") |>
select(-c(location, country, year, bloom_date)) |>
pivot_longer(cols = -bloom_doy) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 4) +
geom_point(size = 0.5, alpha = 0.25) +
stat_bin_2d(aes(fill = ..density..),
geom = "raster",
contour = FALSE,
show.legend = FALSE) +
scale_fill_distiller(palette = 4, direction = -1) +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "firebrick3",
se = FALSE) +
scale_x_log10() +
labs(y = "DOY", x = "", title = "Japan")df_full |>
filter(country == "Switzerland") |>
select(-c(location, country, year, bloom_date)) |>
pivot_longer(cols = -bloom_doy) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 4) +
geom_point(size = 0.5, alpha = 0.25) +
stat_bin_2d(aes(fill = ..density..),
geom = "raster",
contour = FALSE,
show.legend = FALSE) +
scale_fill_distiller(palette = 4, direction = -1) +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "firebrick3",
se = FALSE) +
scale_x_log10() +
labs(y = "DOY", x = "", title = "Switzerland")df_full |>
filter(country == "South Korea") |>
select(-c(location, country, year, bloom_date)) |>
pivot_longer(cols = -bloom_doy) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 4) +
geom_point(size = 0.5, alpha = 0.25) +
stat_bin_2d(aes(fill = ..density..),
geom = "raster",
contour = FALSE,
show.legend = FALSE) +
scale_fill_distiller(palette = 4, direction = -1) +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "firebrick3",
se = FALSE) +
scale_x_log10() +
labs(y = "DOY", x = "", title = "South Korea")df_full |>
filter(location != "NPN") |>
select(-c(location, country, year, bloom_date)) |>
pivot_longer(cols = -bloom_doy) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 4) +
geom_point(size = 0.5, alpha = 0.25) +
stat_bin_2d(aes(fill = ..density..),
geom = "raster",
contour = FALSE,
show.legend = FALSE) +
scale_fill_distiller(palette = 4, direction = -1) +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "firebrick3",
se = FALSE) +
scale_x_log10() +
labs(y = "DOY", x = "", title = "All the countries")df_full |>
filter(country == "Japan") |>
select(where(is.numeric), -c(shrubs)) |>
correlate(method = "spearman") |>
filter(term == "bloom_doy") |>
pivot_longer(cols = -term) |>
filter(!is.na(value)) |>
mutate(
name = str_replace_all(
name,
"wildareas-v3-1993-human-footprint",
"human-footprint93"
),
name = str_replace_all(
name,
"wildareas-v3-2009-human-footprint",
"human-footprint09"
)
) |>
ggplot(aes(
x = reorder(name, value),
y = term,
fill = value
)) +
geom_tile() +
scale_fill_gradient2(
low = muted("red"),
mid = "white",
high = muted("dodgerblue2"),
midpoint = 0
) +
theme(axis.text.x = element_text(angle = 35, hjust = 1)) +
labs(x = "",
y = "",
fill = "",
title = "Japan") +
coord_flip()
df_full |>
filter(country == "Switzerland") |>
select(where(is.numeric), -c(shrubs)) |>
correlate(method = "spearman") |>
filter(term == "bloom_doy") |>
pivot_longer(cols = -term) |>
filter(!is.na(value)) |>
mutate(name = str_replace_all(name,
"wildareas-v3-1993-human-footprint",
"human-footprint93"),
name = str_replace_all(name,
"wildareas-v3-2009-human-footprint",
"human-footprint09")) |>
ggplot(aes(x = reorder(name, value), y = term, fill = value)) +
geom_tile() +
geom_tile() +
scale_fill_gradient2(
low = muted("red"),
mid = "white",
high = muted("dodgerblue2"),
midpoint = 0
) +
theme(axis.text.x = element_text(angle = 35, hjust = 1)) +
labs(x = "", y = "", fill = "", title = "Switzerland") +
coord_flip()
df_full |>
filter(country == "South Korea") |>
select(where(is.numeric), -c(shrubs)) |>
correlate(method = "spearman") |>
filter(term == "bloom_doy") |>
pivot_longer(cols = -term) |>
filter(!is.na(value)) |>
mutate(name = str_replace_all(name,
"wildareas-v3-1993-human-footprint",
"human-footprint93"),
name = str_replace_all(name,
"wildareas-v3-2009-human-footprint",
"human-footprint09")) |>
ggplot(aes(x = reorder(name, value), y = term, fill = value)) +
geom_tile() +
geom_tile() +
scale_fill_gradient2(
low = muted("red"),
mid = "white",
high = muted("dodgerblue2"),
midpoint = 0
) +
theme(axis.text.x = element_text(angle = 35, hjust = 1)) +
labs(x = "", y = "", fill = "", title = "South Korea") +
coord_flip()
df_full |>
filter(location != "NPN") |>
select(where(is.numeric), -c(shrubs)) |>
correlate(method = "spearman") |>
filter(term == "bloom_doy") |>
pivot_longer(cols = -term) |>
filter(!is.na(value)) |>
mutate(name = str_replace_all(name,
"wildareas-v3-1993-human-footprint",
"human-footprint93"),
name = str_replace_all(name,
"wildareas-v3-2009-human-footprint",
"human-footprint09")) |>
ggplot(aes(x = reorder(name, value), y = term, fill = value)) +
geom_tile() +
geom_tile() +
scale_fill_gradient2(
low = muted("red"),
mid = "white",
high = muted("dodgerblue2"),
midpoint = 0
) +
theme(axis.text.x = element_text(angle = 35, hjust = 1),
legend.key.size = unit(0.85, "cm")) +
labs(x = "", y = "", fill = "", title = "All the countries") +
coord_flip()bloom_doy.bloom_doy (ya que esta variable sí cambia con el tiempo).df_full |>
filter(country == "Japan") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio1, y = bio12, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual mean temperature (°C)",
y = "Annual precipitation (mm)",
color = "DOY (median)",
title = "Japan") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)
df_full |>
filter(country == "Switzerland") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio1, y = bio12, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual mean temperature (°C)",
y = "Annual precipitation (mm)",
color = "DOY (median)",
title = "Switzerland") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)
df_full |>
filter(country == "South Korea") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio1, y = bio12, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual mean temperature (°C)",
y = "Annual precipitation (mm)",
color = "DOY (median)",
title = "South Korea") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)df_full |>
filter(country == "Japan") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio1, y = `nitrogen_0-5cm_mean`, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual mean temperature (°C)",
y = "Nitrogen (cg/kg)",
color = "DOY (median)",
title = "Japan") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
scale_x_log10() +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)
df_full |>
filter(country == "Switzerland") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio1, y = `nitrogen_0-5cm_mean`, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual mean temperature (°C)",
y = "Nitrogen (cg/kg)",
color = "DOY (median)",
title = "Switzerland") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
scale_x_log10() +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)
df_full |>
filter(country == "South Korea") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio1, y = `nitrogen_0-5cm_mean`, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual mean temperature (°C)",
y = "Nitrogen (cg/kg)",
color = "DOY (median)",
title = "South Korea") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)df_full |>
filter(`soc_0-5cm_mean` > 0) |>
filter(country == "Japan") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio1, y = `soc_0-5cm_mean`, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual mean temperature (°C)",
y = "Soil organic carbon (dg/kg)",
color = "DOY (median)",
title = "Japan") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
scale_x_log10() +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)
df_full |>
filter(`soc_0-5cm_mean` > 0) |>
filter(country == "Switzerland") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio1, y = `soc_0-5cm_mean`, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual mean temperature (°C)",
y = "Soil organic carbon (dg/kg)",
color = "DOY (median)",
title = "Switzerland") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
scale_x_log10() +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)
df_full |>
filter(`soc_0-5cm_mean` > 0) |>
filter(country == "South Korea") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio1, y = `soc_0-5cm_mean`, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual mean temperature (°C)",
y = "Soil organic carbon (dg/kg)",
color = "DOY (median)",
title = "South Korea") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)df_full |>
filter(`bdod_0-5cm_mean` > 0) |>
filter(country == "Japan") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio1, y = `bdod_0-5cm_mean`, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual mean temperature (°C)",
y = "Bulk density (cg/cm³)",
color = "DOY (median)",
title = "Japan") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
scale_x_log10() +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)
df_full |>
filter(`bdod_0-5cm_mean` > 0) |>
filter(country == "Switzerland") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio1, y = `bdod_0-5cm_mean`, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual mean temperature (°C)",
y = "Bulk density (cg/cm³)",
color = "DOY (median)",
title = "Switzerland") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
scale_x_log10() +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)
df_full |>
filter(`bdod_0-5cm_mean` > 0) |>
filter(country == "South Korea") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio1, y = `bdod_0-5cm_mean`, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual mean temperature (°C)",
y = "Bulk density (cg/cm³)",
color = "DOY (median)",
title = "South Korea") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)df_full |>
filter(`nitrogen_0-5cm_mean` > 0) |>
filter(country == "Japan") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio12, y = `nitrogen_0-5cm_mean`, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual precipitation (mm)",
y = "Nitrogen (cg/kg)",
color = "DOY (median)",
title = "Japan") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
scale_x_log10() +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)
df_full |>
filter(`nitrogen_0-5cm_mean` > 0) |>
filter(country == "Switzerland") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio12, y = `nitrogen_0-5cm_mean`, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual precipitation (mm)",
y = "Nitrogen (cg/kg)",
color = "DOY (median)",
title = "Switzerland") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
scale_x_log10() +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)
df_full |>
filter(`nitrogen_0-5cm_mean` > 0) |>
filter(country == "South Korea") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = bio12, y = `nitrogen_0-5cm_mean`, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(x = "Annual precipitation (mm)",
y = "Nitrogen (cg/kg)",
color = "DOY (median)",
title = "South Korea") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)df_full |>
filter(country == "Japan") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = alt, y = bio12, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(y = "Annual precipitation (mm)",
x = "Altitude",
color = "DOY (median)",
title = "Japan") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
scale_x_log10() +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)
df_full |>
filter(country == "Switzerland") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = alt, y = bio12, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(y = "Annual precipitation (mm)",
x = "Altitude)",
color = "DOY (median)",
title = "Switzerland") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
scale_x_log10() +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)
df_full |>
filter(country == "South Korea") |>
group_by(lat, long) |>
mutate(
median_doy = median(bloom_doy, na.rm = TRUE)
) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
ggplot(aes(x = alt, y = bio12, color = bloom_doy)) +
geom_point() +
scale_color_viridis_c(
trans = "log10",
breaks = trans_breaks(trans = "log10",
inv = function(x) round(10 ^ x, digits = 1))
) +
labs(y = "Annual precipitation (mm)",
x = "Altitude",
color = "DOY (median)",
title = "South Korea") +
theme(legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm")) +
geom_smooth(method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5)En este caso como las variables son numéricas simplemento multiplico las dos variables bajo análisis para evaluar el efecto de interacción. Teniendo en cuenta que bloom_doy es la variable respuesta \((y)\) y si ejemplificamos un modelo sencillo que incluye las variables temperatura anual \((x_1)\) y precipitación \((x_2)\) como predictoras, mi intención es validar si el efecto acumulativo o conjunto está presente. Podemos estimar el efecto marginal de \(x_1\) y definirlo como \(\beta_1\), lo propio podemos hacer con el efecto marginal de \(x_2\) y definirlo como \(\beta_2\), sin embargo, también es de interés evaluar el efecto de interacción \((\beta_3)x_1x_2\), en ese orden de ideas se puede expresar el modelo matamático como se muestra a continuación:
\[y = \beta_0 + \beta_1x_1 + \beta_2x_2 + \beta_3x_1x_2 + error\]
Un primer resultado interesante de las interaccione evaluadas es que el comportamiento discrepa entre países, por ejemplo, la interacción temperatura-precipitación muestra una relación inversa con bloom_doy, pero no es lineal en los tres países, en Corea del Sur la relación tiende a ser cuadrática. Algo similar podemos observar en la interacción temperatura-nitrógeno, en Japón y Corea del Sur no exhibe ninguna relación con la variable objetivo, sin embargo, en Suiza se evidencia una relación lineal inversa. En los tres países la interacción temperatura-bulk density se relaciona de forma negativa con la variable respuesta, pero en Japón es evidente la dependencia lineal que existe entre la variable \(y\) y la predictora derivada de esta interacción.
Más adelante (en el documento 06-EDA2-FE.qmd) hago uso de la regresión lasso para seleccionar predictores y evaluar posibles efectos de interacción que no fueron representados en este análisis exploratorio.
countries <- c("Japan", "Switzerland", "South Korea")
df_full |>
filter(country == countries[1]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio1 * bio12) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual mean temperature (°C) * Annual precipitation (mm)",
y = "DOY",
title = countries[1])
df_full |>
filter(country == countries[2]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio1 * bio12) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual mean temperature (°C) * Annual precipitation (mm)",
y = "DOY",
title = countries[2])
df_full |>
filter(country == countries[3]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio1 * bio12) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual mean temperature (°C) * Annual precipitation (mm)",
y = "DOY",
title = countries[3])df_full |>
filter(country == countries[1]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio1 * `nitrogen_0-5cm_mean`) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual mean temperature (°C) * Nitrogen (cg/kg)",
y = "DOY",
title = countries[1])
df_full |>
filter(country == countries[2]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio1 * `nitrogen_0-5cm_mean`) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual mean temperature (°C) * Nitrogen (cg/kg)",
y = "DOY",
title = countries[2])
df_full |>
filter(country == countries[3]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio1 * `nitrogen_0-5cm_mean`) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual mean temperature (°C) * Nitrogen (cg/kg)",
y = "DOY",
title = countries[3])df_full |>
filter(country == countries[1]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio1 * `soc_0-5cm_mean`) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual mean temperature (°C) * Soil organic carbon (dg/kg)",
y = "DOY",
title = countries[1])
df_full |>
filter(country == countries[2]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio1 * `soc_0-5cm_mean`) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual mean temperature (°C) * Soil organic carbon (dg/kg)",
y = "DOY",
title = countries[2])
df_full |>
filter(country == countries[3]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio1 * `soc_0-5cm_mean`) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual mean temperature (°C) * Soil organic carbon (dg/kg)",
y = "DOY",
title = countries[3])df_full |>
filter(country == countries[1]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio1 * `bdod_0-5cm_mean`) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual mean temperature (°C) * Bulk density (cg/cm³)",
y = "DOY",
title = countries[1])
df_full |>
filter(country == countries[2]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio1 * `bdod_0-5cm_mean`) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual mean temperature (°C) * Bulk density (cg/cm³)",
y = "DOY",
title = countries[2])
df_full |>
filter(country == countries[3]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio1 * `bdod_0-5cm_mean`) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual mean temperature (°C) * Bulk density (cg/cm³)",
y = "DOY",
title = countries[3])df_full |>
filter(country == countries[1]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio12 * `nitrogen_0-5cm_mean`) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual precipitation (mm) * Nitrogen (cg/kg)",
y = "DOY",
title = countries[1])
df_full |>
filter(country == countries[2]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio12 * `nitrogen_0-5cm_mean`) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual precipitation (mm) * Nitrogen (cg/kg)",
y = "DOY",
title = countries[2])
df_full |>
filter(country == countries[3]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = bio12 * `nitrogen_0-5cm_mean`) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Annual precipitation (mm) * Nitrogen (cg/kg)",
y = "DOY",
title = countries[3])df_full |>
filter(country == countries[1]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = alt * bio12) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Altitude * Annual precipitation (mm) ",
y = "DOY",
title = countries[1])
df_full |>
filter(country == countries[2]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = alt * bio12) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Altitude * Annual precipitation (mm) ",
y = "DOY",
title = countries[2])
df_full |>
filter(country == countries[3]) |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
mutate(var_inter = alt * bio12) |>
ggplot(aes(x = var_inter, y = bloom_doy)) +
geom_point() +
scale_x_log10() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "Altitude * Annual precipitation (mm) ",
y = "DOY",
title = countries[3])bloom_doy (ya que esta variable sí cambia con el tiempo).Q1: valores de bloom_doy inferiores al valor del cuartil 1: \(DOY < Q1\).Q2: valores de bloom_doy mayores o iguales al valor del cuartil 1 y menores al valor del cuartil 2: \(Q1 \leq DOY < Q2\).Q3: valores de bloom_doy mayores o iguales al valor del cuartil 2 y menores al valor del cuartil 3: \(Q2 \leq DOY < Q3\).Q4: valores de bloom_doy superiores o iguales al valor del cuartil 3: \(DOY \geq Q3\).doy_categ la introduzco al PCA como variable suplementaría cualitativa.DOY < Q1) y las que se tardan más del Q3 (DOY >= Q3) exhiben diferencias al graficarlas en los tres primeros componentes principales, quedando ubicadas en lugares diferentes. Es importante mencionar que el patrón es menos visible en Corea del Sur. Floraciones que están entre el el Q1 y Q3 tienden a ser similares. Esta diferenciación que se aprecia entre floraciones precoces y floraciones tardías podría constituirse en un indicativo de perfiles diferenciales en el nicho ecológico o entorno ambiental al cual están expuestos los cerezos.df_pca_japan1 <-
df_full |>
filter(country == countries[1])
value_q1 <-
quantile(df_pca_japan1$bloom_doy, probs = 0.25, na.rm = TRUE)
value_q2 <-
quantile(df_pca_japan1$bloom_doy, probs = 0.50, na.rm = TRUE)
value_q3 <-
quantile(df_pca_japan1$bloom_doy, probs = 0.75, na.rm = TRUE)
order_categ <-
c("DOY < Q1",
"Q1 <= DOY < Q2",
"Q2 <= DOY < Q3",
"DOY >= Q3")
df_pca_japan2 <-
df_pca_japan1 |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
select(-shrubs) |>
mutate(
doy_categ = case_when(
median_doy < value_q1 ~ "DOY < Q1",
median_doy >= value_q1 &
median_doy < value_q2 ~ "Q1 <= DOY < Q2",
median_doy >= value_q2 &
median_doy < value_q3 ~ "Q2 <= DOY < Q3",
median_doy >= value_q3 ~ "DOY >= Q3",
),
doy_categ = factor(doy_categ, levels = order_categ)
) |>
select(-c(location, country, bloom_date, bloom_doy, median_doy)) |>
relocate(doy_categ, everything())
pca_japan <- PCA(X = df_pca_japan2, graph = FALSE, quali.sup = 1)
df_pca_japan2$pc1 <- pca_japan$ind$coord[, 1]
df_pca_japan2$pc2 <- pca_japan$ind$coord[, 2]
df_pca_japan2$pc3 <- pca_japan$ind$coord[, 3]
eigen_pc1 <- pca_japan$eig[, 2][1] |> round(digits = 1)
eigen_pc2 <- pca_japan$eig[, 2][2] |> round(digits = 1)
eigen_pc3 <- pca_japan$eig[, 2][3] |> round(digits = 1)
df_pca_japan2 |>
ggplot(aes(x = pc1, y = pc2, color = doy_categ)) +
geom_point(size = 2.5, alpha = 0.75, shape = 18) +
geom_hline(yintercept = 0, lty = 2, color = "firebrick3") +
geom_vline(xintercept = 0, lty = 2, color = "firebrick3") +
labs(x = glue("Component 1 ({eigen_pc1}%)"),
y = glue("Component 2 ({eigen_pc2}%)")) +
scale_color_manual(values = c("dodgerblue3", "gray80", "gray80", "forestgreen")) +
labs(color = "", title = "Component 1 vs Component 2")
df_pca_japan2 |>
select(-c(doy_categ)) |>
correlate(method = "pearson") |>
filter(term %in% c("pc1", "pc2", "pc3")) |>
select(-c(pc1, pc2, pc3)) |>
pivot_longer(-term) |>
mutate(
name = str_replace_all(
name,
"wildareas-v3-1993-human-footprint",
"human-footprint93"
),
name = str_replace_all(
name,
"wildareas-v3-2009-human-footprint",
"human-footprint09"
)
) |>
ggplot(aes(x = reorder(name, value), y = term, fill = value)) +
geom_tile() +
scale_fill_gradient2(
low = muted("red"),
mid = "white",
high = muted("dodgerblue2"),
midpoint = 0
) +
labs(x = "", y = "Component", fill = "Correlation") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 7.5),
legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm"))
df_pca_japan2 |>
ggplot(aes(x = pc1, y = pc3, color = doy_categ)) +
geom_point(size = 2.5, alpha = 0.75, shape = 18) +
geom_hline(yintercept = 0, lty = 2, color = "firebrick3") +
geom_vline(xintercept = 0, lty = 2, color = "firebrick3") +
labs(x = glue("Component 1 ({eigen_pc1}%)"),
y = glue("Component 3 ({eigen_pc3}%)")) +
scale_color_manual(values = c("dodgerblue3", "gray80", "gray80", "forestgreen")) +
labs(color = "", title = "Component 1 vs Component 3")df_pca_switzerland1 <-
df_full |>
filter(country == countries[2])
value_q1 <-
quantile(df_pca_switzerland1$bloom_doy, probs = 0.25, na.rm = TRUE)
value_q2 <-
quantile(df_pca_switzerland1$bloom_doy, probs = 0.50, na.rm = TRUE)
value_q3 <-
quantile(df_pca_switzerland1$bloom_doy, probs = 0.75, na.rm = TRUE)
order_categ <-
c("DOY < Q1",
"Q1 <= DOY < Q2",
"Q2 <= DOY < Q3",
"DOY >= Q3")
df_pca_switzerland2 <-
df_pca_switzerland1 |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
select(-shrubs) |>
mutate(
doy_categ = case_when(
median_doy < value_q1 ~ "DOY < Q1",
median_doy >= value_q1 &
median_doy < value_q2 ~ "Q1 <= DOY < Q2",
median_doy >= value_q2 &
median_doy < value_q3 ~ "Q2 <= DOY < Q3",
median_doy >= value_q3 ~ "DOY >= Q3",
),
doy_categ = factor(doy_categ, levels = order_categ)
) |>
select(-c(location, country, bloom_date, bloom_doy, median_doy)) |>
relocate(doy_categ, everything())
pca_switzerland <- PCA(X = df_pca_switzerland2, graph = FALSE, quali.sup = 1)
df_pca_switzerland2$pc1 <- pca_switzerland$ind$coord[, 1]
df_pca_switzerland2$pc2 <- pca_switzerland$ind$coord[, 2]
df_pca_switzerland2$pc3 <- pca_switzerland$ind$coord[, 3]
eigen_pc1 <- pca_switzerland$eig[, 2][1] |> round(digits = 1)
eigen_pc2 <- pca_switzerland$eig[, 2][2] |> round(digits = 1)
eigen_pc3 <- pca_switzerland$eig[, 2][3] |> round(digits = 1)
df_pca_switzerland2 |>
ggplot(aes(x = pc1, y = pc2, color = doy_categ)) +
geom_point(size = 2.5, alpha = 0.75, shape = 18) +
geom_hline(yintercept = 0, lty = 2, color = "firebrick3") +
geom_vline(xintercept = 0, lty = 2, color = "firebrick3") +
labs(x = glue("Component 1 ({eigen_pc1}%)"),
y = glue("Component 2 ({eigen_pc2}%)")) +
scale_color_manual(values = c("dodgerblue3", "gray80", "gray80", "forestgreen")) +
labs(color = "", title = "Component 1 vs Component 2")
df_pca_switzerland2 |>
select(-c(doy_categ)) |>
correlate(method = "pearson") |>
filter(term %in% c("pc1", "pc2", "pc3")) |>
select(-c(pc1, pc2, pc3)) |>
pivot_longer(-term) |>
mutate(
name = str_replace_all(
name,
"wildareas-v3-1993-human-footprint",
"human-footprint93"
),
name = str_replace_all(
name,
"wildareas-v3-2009-human-footprint",
"human-footprint09"
)
) |>
ggplot(aes(x = reorder(name, value), y = term, fill = value)) +
geom_tile() +
scale_fill_gradient2(
low = muted("red"),
mid = "white",
high = muted("dodgerblue2"),
midpoint = 0
) +
labs(x = "", y = "Component", fill = "Correlation") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 7.5),
legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm"))
df_pca_switzerland2 |>
ggplot(aes(x = pc1, y = pc3, color = doy_categ)) +
geom_point(size = 2.5, alpha = 0.75, shape = 18) +
geom_hline(yintercept = 0, lty = 2, color = "firebrick3") +
geom_vline(xintercept = 0, lty = 2, color = "firebrick3") +
labs(x = glue("Component 1 ({eigen_pc1}%)"),
y = glue("Component 3 ({eigen_pc3}%)")) +
scale_color_manual(values = c("dodgerblue3", "gray80", "gray80", "forestgreen")) +
labs(color = "", title = "Component 1 vs Component 3")df_pca_southk1 <-
df_full |>
filter(country == countries[3])
value_q1 <-
quantile(df_pca_southk1$bloom_doy, probs = 0.25, na.rm = TRUE)
value_q2 <-
quantile(df_pca_southk1$bloom_doy, probs = 0.50, na.rm = TRUE)
value_q3 <-
quantile(df_pca_southk1$bloom_doy, probs = 0.75, na.rm = TRUE)
order_categ <-
c("DOY < Q1",
"Q1 <= DOY < Q2",
"Q2 <= DOY < Q3",
"DOY >= Q3")
df_pca_southk2 <-
df_pca_southk1 |>
group_by(lat, long) |>
mutate(median_doy = median(bloom_doy, na.rm = TRUE)) |>
ungroup() |>
distinct(lat, long, .keep_all = TRUE) |>
select(-shrubs) |>
mutate(
doy_categ = case_when(
median_doy < value_q1 ~ "DOY < Q1",
median_doy >= value_q1 &
median_doy < value_q2 ~ "Q1 <= DOY < Q2",
median_doy >= value_q2 &
median_doy < value_q3 ~ "Q2 <= DOY < Q3",
median_doy >= value_q3 ~ "DOY >= Q3",
),
doy_categ = factor(doy_categ, levels = order_categ)
) |>
select(-c(location, country, bloom_date, bloom_doy, median_doy)) |>
relocate(doy_categ, everything())
pca_southk <- PCA(X = df_pca_southk2, graph = FALSE, quali.sup = 1)
df_pca_southk2$pc1 <- pca_southk$ind$coord[, 1]
df_pca_southk2$pc2 <- pca_southk$ind$coord[, 2]
df_pca_southk2$pc3 <- pca_southk$ind$coord[, 3]
eigen_pc1 <- pca_southk$eig[, 2][1] |> round(digits = 1)
eigen_pc2 <- pca_southk$eig[, 2][2] |> round(digits = 1)
eigen_pc3 <- pca_southk$eig[, 2][3] |> round(digits = 1)
df_pca_southk2 |>
ggplot(aes(x = pc1, y = pc2, color = doy_categ)) +
geom_point(size = 2.5, alpha = 0.75, shape = 18) +
geom_hline(yintercept = 0, lty = 2, color = "firebrick3") +
geom_vline(xintercept = 0, lty = 2, color = "firebrick3") +
labs(x = glue("Component 1 ({eigen_pc1}%)"),
y = glue("Component 2 ({eigen_pc2}%)")) +
scale_color_manual(values = c("dodgerblue3", "gray80", "gray80", "forestgreen")) +
labs(color = "", title = "Component 1 vs Component 2")
df_pca_southk2 |>
select(-c(doy_categ)) |>
correlate(method = "pearson") |>
filter(term %in% c("pc1", "pc2", "pc3")) |>
select(-c(pc1, pc2, pc3)) |>
pivot_longer(-term) |>
mutate(
name = str_replace_all(
name,
"wildareas-v3-1993-human-footprint",
"human-footprint93"
),
name = str_replace_all(
name,
"wildareas-v3-2009-human-footprint",
"human-footprint09"
)
) |>
ggplot(aes(x = reorder(name, value), y = term, fill = value)) +
geom_tile() +
scale_fill_gradient2(
low = muted("red"),
mid = "white",
high = muted("dodgerblue2"),
midpoint = 0
) +
labs(x = "", y = "Component", fill = "Correlation") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 7.5),
legend.key.size = unit(1, "cm"),
legend.key.width = unit(2, "cm"))
df_pca_southk2 |>
ggplot(aes(x = pc1, y = pc3, color = doy_categ)) +
geom_point(size = 2.5, alpha = 0.75, shape = 18) +
geom_hline(yintercept = 0, lty = 2, color = "firebrick3") +
geom_vline(xintercept = 0, lty = 2, color = "firebrick3") +
labs(x = glue("Component 1 ({eigen_pc1}%)"),
y = glue("Component 3 ({eigen_pc3}%)")) +
scale_color_manual(values = c("dodgerblue3", "gray80", "gray80", "forestgreen")) +
labs(color = "", title = "Component 1 vs Component 3")shrubs porque todos sus valores están en cero.Phenophase_DescriptionSpecies_IDGenusCommon_NameKingdomPhenophase_IDPhenophase_DescriptionNumDays_Since_Prior_NoFirst_Yes_YearFirst_Yes_MonthFirst_Yes_DayFirst_Yes_Julian_DateLast_Yes_Julian_DateLast_Yes_YearLast_Yes_MonthLast_Yes_DayLast_Yes_Julian_DateNumDays_Until_Next_NoAGDD_in_FObservation_DateLast_Yes_DOYFirst_Yes_DOYFirst_Yes_DOY y Last_Yes_DOY en los datos USA-NPN_individual_phenometrics_data.csv. Estas dos variables también hacen parte de la lista de variables excluidas.# External data NPN
data_npn_fixed <-
df_full |>
filter(country == "USA-NPN") |>
select(-contains("bio"),
-c(shrubs, location, country, year, bloom_date,
bloom_doy, alt)) |>
distinct(lat, long, .keep_all = TRUE)
# NPN
var_drop <-
c(
"Phenophase_Description",
"Species_ID",
"Genus",
"Common_Name",
"Kingdom",
"Phenophase_ID",
"Phenophase_Description",
"NumDays_Since_Prior_No",
"First_Yes_Year",
"First_Yes_Month",
"First_Yes_Day",
"First_Yes_Julian_Date",
"Last_Yes_Julian_Date",
"Last_Yes_Year",
"Last_Yes_Month",
"Last_Yes_Day",
"Last_Yes_Julian_Date",
"NumDays_Until_Next_No",
"AGDD_in_F",
"Last_Yes_DOY",
"First_Yes_DOY"
)
data_npn1 <-
read_csv("../data/USA-NPN_individual_phenometrics_data.csv") |>
mutate(
Observation_Date = str_c(First_Yes_Month, "-", First_Yes_Day, "-", First_Yes_Year),
Observation_Date = mdy(Observation_Date),
bloom_doy = (Last_Yes_DOY + First_Yes_DOY) / 2
) |>
distinct_all() |>
rename(lat = Latitude,
long = Longitude,
alt = Elevation_in_Meters) |>
select(!all_of(var_drop)) |>
relocate(Observation_Date,
Site_ID,
Individual_ID,
Species,
bloom_doy,
everything())
data_npn2 <-
read_csv("../data/USA-NPN_status_intensity_observations_data.csv") |>
mutate(
Observation_Date = mdy(Observation_Date),
bloom_doy = Day_of_Year
) |>
distinct_all() |>
rename(lat = Latitude,
long = Longitude,
alt = Elevation_in_Meters) |>
select(all_of(names(data_npn1))) |>
relocate(Observation_Date,
Site_ID,
Individual_ID,
Species,
bloom_doy,
everything())
# Join data NPN + data "fixed" or "static"
data_npn <- bind_rows(data_npn1, data_npn2) |>
left_join(data_npn_fixed, by = c("lat", "long")) |>
distinct_all()
# Countries vector
countries <-
c(
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"Canada",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"Canada",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"China",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"Japan",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"Canada",
"USA",
"USA",
"USA",
"USA",
"Canada",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"China",
"USA",
"USA",
"China",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"USA",
"Canada",
"USA",
"USA",
"USA",
"USA",
"Canada",
"Canada",
"USA"
)
# Cities vector
cities <-
c(
"Fayetteville, North Carolina",
"Raleigh, North Carolina",
"Washington D.C.",
"Knoxville, Tennessee",
"San francisco California",
"Roanoke, Virginia",
"Burlington, Vermont",
"Gatlinburg, Tennessee",
"Pigeon Forge, Tennessee",
"Denver, Colorado",
"New York, New York",
"Evansville, Indiana",
"Boston, Massachusetts",
"Fargo, North Dakota",
"Charlotte, North Carolina",
"Kansas City, Missouri",
"New York, New York",
"Duluth, Minnesota",
"Thunder Bay, Ontario",
"Washington D.C.",
"Washington D.C.",
"Washington D.C.",
"Washington D.C.",
"Washington D.C.",
"Newport, Oregon",
"Washington D.C.",
"Washington D.C.",
"Washington D.C.",
"Washington D.C.",
"Washington D.C.",
"Washington D.C.",
"Washington D.C.",
"Alexandria, Virginia",
"Alexandria, Virginia",
"New York, New York",
"New York, New York",
"New York, New York",
"Alexandria, Virginia",
"New York, New York",
"Minneapolis, Minnesota",
"London, Kentucky",
"Hagerstown, Maryland",
"London, Kentucky",
"Nashville, Tennessee",
"Philadelphia, Pennsylvania",
"Allentown, Pennsylvania",
"New York, New York",
"Sudbury, Ontario",
"Philadelphia, Pennsylvania",
"Roanoke, Virginia",
"Denver, Colorado",
"Lexington, Kentucky",
"Indianapolis, Indiana",
"Denver, Colorado",
"Chengdu, Sichuan",
"New York, New York",
"Sacramento, California",
"Asheville, North Carolina",
"New York, New York",
"Salem, Oregon",
"Seattle, Washington",
"Seattle, Washington",
"Salem, Oregon",
"Portland, Oregon",
"Seattle, Washington",
"Seattle, Washington",
"Tacoma, Washington",
"Muncie, Ind.",
"Kyoto, Kyoto",
"Redmond, Washington",
"Salem, Oregon",
"Northampton, Massachusetts",
"Asheville, North Carolina",
"Salem, Oregon",
"Raleigh, North Carolina",
"Albany, New York",
"Duluth, Minnesota",
"New York, New York",
"New York, New York",
"Providence, Rhode Island",
"Seattle, Washington",
"Greeley, Colorado",
"Lexington, Kentucky",
"Portland, Oregon",
"Columbus, Ohio",
"North Bend, Washington",
"Denver, Colorado",
"Seattle, Washington",
"Sacramento, California",
"Klamath Falls, Oregon",
"Reno, Nevada",
"Edmonton, Alberta",
"Eugene, Oregon",
"Corvallis, Oregon",
"Longview, Washington",
"Port Angeles, Washington",
"Vancouver, British Columbia",
"Salem, Oregon",
"Albany, Oregon",
"Portland, Oregon",
"Lebanon, Virginia",
"Asheville, North Carolina",
"Boston, Massachusetts",
"Baltimore, Maryland",
"Newark, New Jersey",
"San francisco California",
"New York, New York",
"Okinawa",
"Lexington, Kentucky",
"New York, New York",
"Dalian, Liaoning",
"Fayetteville, North Carolina",
"Raleigh, North Carolina",
"Goldsboro, North Carolina",
"Buffalo, New York",
"Newburgh, New York",
"Amherst, Massachusetts",
"Buffalo, New York",
"New York, New York",
"Washington D.C.",
"Providence, Rhode Island",
"Cambridge, Massachusetts",
"Atlantic City, New Jersey",
"Newport, Oregon",
"Lexington, Kentucky",
"New York, New York",
"Portland, Oregon",
"Greenfield, Massachusetts",
"Riverhead, New York",
"Raleigh, North Carolina",
"New York, New York",
"New York, New York",
"Allentown, Pennsylvania",
"Williamsport, Pennsylvania",
"Baltimore, Maryland",
"Salt Lake City, Utah",
"Lexington, Kentucky",
"Providence, Rhode Island",
"Newark, New Jersey",
"Harrisburg, Pennsylvania",
"Philadelphia, Pennsylvania",
"Johnson City, Tennessee",
"Syracuse, New York",
"Wilmington, Delaware",
"Vancouver, British Columbia",
"Chicago, Illinois",
"Burlington, Vermont",
"Boston, Massachusetts",
"Portland, Oregon",
"Toronto, Ontario",
"Niagara Falls, Ontario",
"Eugene, Oregon"
)
data_npn <-
data_npn |>
left_join(
data_npn |>
distinct(State, lat, long) |>
mutate(country = countries,
city = cities),
by = c("State", "lat", "long")
)
# Export data
write_parquet(data_npn, "../external-data/df_npn_usa.parquet")
data_npn |> head()data_npn_fixed |>
pivot_longer(cols = where(is.numeric)) |>
ggplot(aes(x = value)) +
facet_wrap(~name, scales = "free", ncol = 4) +
geom_histogram(color = "black")data_npn |>
select(!all_of(names(data_npn_fixed)))|>
select(-c(Site_ID, Individual_ID)) |>
pivot_longer(cols = where(is.numeric)) |>
filter(value > -100) |>
ggplot(aes(x = value)) +
facet_wrap(~name, scales = "free", ncol = 4) +
geom_histogram(color = "black")data_npn |>
ggplot(aes(x = bloom_doy)) +
facet_wrap(~Species, ncol = 1, scales = "free_y") +
geom_histogram(color = "black") +
geom_rug() +
labs(x = "Bloom DOY", y = "Count",
title = "DOY distribution by specie",
subtitle = "Original scale")
data_npn |>
ggplot(aes(x = bloom_doy)) +
facet_wrap(~Species, ncol = 1, scales = "free_y") +
geom_histogram(color = "black") +
geom_rug() +
scale_y_log10() +
labs(x = "Bloom DOY", y = "Count",
title = "DOY distribution by specie",
subtitle = "Original scale")State.list_species <- unique(data_npn$Species)data_npn |>
filter(Species == list_species[1]) |>
mutate(State = str_c(State, " - ", city)) |>
group_by(State) |>
mutate(flag_n = n()) |>
ungroup() |>
filter(flag_n >= 5) |>
ggplot(aes(x = State, y = bloom_doy)) +
geom_boxplot() +
labs(
y = "Bloom DOY",
x = "",
title = "DOY distribution by state",
subtitle = glue("{list_species[1]}")
) +
coord_flip() +
scale_y_log10()data_npn |>
filter(Species == list_species[2]) |>
mutate(State = str_c(State, " - ", city)) |>
group_by(State) |>
mutate(flag_n = n()) |>
ungroup() |>
filter(flag_n >= 5) |>
ggplot(aes(x = State, y = bloom_doy)) +
geom_boxplot() +
labs(
y = "Bloom DOY",
x = "",
title = "DOY distribution by state",
subtitle = glue("{list_species[2]}")
) +
coord_flip() +
scale_y_log10()data_npn |>
filter(Species == list_species[3]) |>
mutate(State = str_c(State, " - ", city)) |>
group_by(State) |>
mutate(flag_n = n()) |>
ungroup() |>
filter(flag_n >= 5) |>
ggplot(aes(x = State, y = bloom_doy)) +
geom_boxplot() +
labs(
y = "Bloom DOY",
x = "",
title = "DOY distribution by state",
subtitle = glue("{list_species[3]}")
) +
coord_flip() +
scale_y_log10()data_npn |>
filter(Species == list_species[4]) |>
mutate(State = str_c(State, " - ", city)) |>
group_by(State) |>
mutate(flag_n = n()) |>
ungroup() |>
filter(flag_n >= 5) |>
ggplot(aes(x = State, y = bloom_doy)) +
geom_boxplot() +
labs(
y = "Bloom DOY",
x = "",
title = "DOY distribution by state",
subtitle = glue("{list_species[4]}")
) +
coord_flip() +
scale_y_log10()data_npn |>
filter(Species == list_species[5]) |>
mutate(State = str_c(State, " - ", city)) |>
group_by(State) |>
mutate(flag_n = n()) |>
ungroup() |>
filter(flag_n >= 5) |>
ggplot(aes(x = State, y = bloom_doy)) +
geom_boxplot() +
labs(
y = "Bloom DOY",
x = "",
title = "DOY distribution by state",
subtitle = glue("{list_species[5]}")
) +
coord_flip() +
scale_y_log10()data_npn |>
filter(Species == list_species[1]) |>
select(bloom_doy, contains("Tm")) |>
pivot_longer(cols = -c(bloom_doy)) |>
filter(value > -100) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 4) +
geom_point() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "", y = "DOY",
title = glue("{list_species[1]}")) +
scale_y_log10() data_npn |>
filter(Species == list_species[2]) |>
select(bloom_doy, contains("Tm")) |>
pivot_longer(cols = -c(bloom_doy)) |>
filter(value > -100) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 4) +
geom_point() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "", y = "DOY",
title = glue("{list_species[2]}")) +
scale_y_log10() data_npn |>
filter(Species == list_species[3]) |>
select(bloom_doy, contains("Tm")) |>
pivot_longer(cols = -c(bloom_doy)) |>
filter(value > -100) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 4) +
geom_point() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "", y = "DOY",
title = glue("{list_species[3]}")) +
scale_y_log10() data_npn |>
filter(Species == list_species[4]) |>
select(bloom_doy, contains("Tm")) |>
pivot_longer(cols = -c(bloom_doy)) |>
filter(value > -100) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 4) +
geom_point() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "", y = "DOY",
title = glue("{list_species[4]}")) +
scale_y_log10() data_npn |>
filter(Species == list_species[5]) |>
select(bloom_doy, contains("Tm")) |>
pivot_longer(cols = -c(bloom_doy)) |>
filter(value > -100) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 4) +
geom_point() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "", y = "DOY",
title = glue("{list_species[5]}")) +
scale_y_log10() data_npn |>
filter(Species == list_species[1]) |>
select(bloom_doy, contains("Prcp_")) |>
pivot_longer(cols = -c(bloom_doy)) |>
filter(value > -100) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 4) +
geom_point() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "", y = "DOY",
title = glue("{list_species[1]}")) +
scale_y_log10() data_npn |>
filter(Species == list_species[2]) |>
select(bloom_doy, contains("Prcp_")) |>
pivot_longer(cols = -c(bloom_doy)) |>
filter(value > -100) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 4) +
geom_point() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "", y = "DOY",
title = glue("{list_species[2]}")) +
scale_y_log10() data_npn |>
filter(Species == list_species[3]) |>
select(bloom_doy, contains("Prcp_")) |>
pivot_longer(cols = -c(bloom_doy)) |>
filter(value > -100) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 4) +
geom_point() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "", y = "DOY",
title = glue("{list_species[3]}")) +
scale_y_log10() data_npn |>
filter(Species == list_species[4]) |>
select(bloom_doy, contains("Prcp_")) |>
pivot_longer(cols = -c(bloom_doy)) |>
filter(value > -100) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 4) +
geom_point() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "", y = "DOY",
title = glue("{list_species[4]}")) +
scale_y_log10() data_npn |>
filter(Species == list_species[5]) |>
select(bloom_doy, contains("Prcp_")) |>
pivot_longer(cols = -c(bloom_doy)) |>
filter(value > -100) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 4) +
geom_point() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "", y = "DOY",
title = glue("{list_species[5]}")) +
scale_y_log10() data_npn |>
filter(Species == list_species[1]) |>
select(bloom_doy, lat, long, alt, AGDD, Daylength) |>
pivot_longer(cols = -c(bloom_doy)) |>
filter(value > -100) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 5) +
geom_point() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "", y = "DOY",
title = glue("{list_species[1]}")) +
scale_y_log10() data_npn |>
filter(Species == list_species[2]) |>
select(bloom_doy, lat, long, alt, AGDD, Daylength) |>
pivot_longer(cols = -c(bloom_doy)) |>
filter(value > -100) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 5) +
geom_point() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "", y = "DOY",
title = glue("{list_species[2]}")) +
scale_y_log10() data_npn |>
filter(Species == list_species[3]) |>
select(bloom_doy, lat, long, alt, AGDD, Daylength) |>
pivot_longer(cols = -c(bloom_doy)) |>
filter(value > -100) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 5) +
geom_point() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "", y = "DOY",
title = glue("{list_species[3]}")) +
scale_y_log10() data_npn |>
filter(Species == list_species[4]) |>
select(bloom_doy, lat, long, alt, AGDD, Daylength) |>
pivot_longer(cols = -c(bloom_doy)) |>
filter(value > -100) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 5) +
geom_point() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "", y = "DOY",
title = glue("{list_species[4]}")) +
scale_y_log10() data_npn |>
filter(Species == list_species[5]) |>
select(bloom_doy, lat, long, alt, AGDD, Daylength) |>
pivot_longer(cols = -c(bloom_doy)) |>
filter(value > -100) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 5) +
geom_point() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "", y = "DOY",
title = glue("{list_species[5]}")) +
scale_y_log10() data_npn |>
filter(Species == list_species[1]) |>
select(bloom_doy, `bdod_0-5cm_mean`:water) |>
pivot_longer(cols = -c(bloom_doy)) |>
filter(value > -100) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 4) +
geom_point() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "", y = "DOY",
title = glue("{list_species[1]}")) +
scale_y_log10()data_npn |>
filter(Species == list_species[2]) |>
select(bloom_doy, `bdod_0-5cm_mean`:water) |>
pivot_longer(cols = -c(bloom_doy)) |>
filter(value > -100) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 4) +
geom_point() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "", y = "DOY",
title = glue("{list_species[2]}")) +
scale_y_log10() data_npn |>
filter(Species == list_species[3]) |>
select(bloom_doy, `bdod_0-5cm_mean`:water) |>
pivot_longer(cols = -c(bloom_doy)) |>
filter(value > -100) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 4) +
geom_point() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "", y = "DOY",
title = glue("{list_species[3]}")) +
scale_y_log10() data_npn |>
filter(Species == list_species[4]) |>
select(bloom_doy, `bdod_0-5cm_mean`:water) |>
pivot_longer(cols = -c(bloom_doy)) |>
filter(value > -100) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 4) +
geom_point() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "", y = "DOY",
title = glue("{list_species[4]}")) +
scale_y_log10() data_npn |>
filter(Species == list_species[5]) |>
select(bloom_doy, `bdod_0-5cm_mean`:water) |>
pivot_longer(cols = -c(bloom_doy)) |>
filter(value > -100) |>
ggplot(aes(x = value, y = bloom_doy)) +
facet_wrap( ~ name, scales = "free", ncol = 4) +
geom_point() +
geom_smooth(
method = "gam",
formula = y ~ ns(x, df = 2),
color = "red",
size = 0.5
) +
labs(x = "", y = "DOY",
title = glue("{list_species[5]}")) +
scale_y_log10()